package crawler.biaozhi;

import java.net.HttpURLConnection;
import java.net.URL;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.zql.entity.AgencyEntity;

import dao.AgencyDao;
import util.Location;
import util.MybatisTool;
import util.TianYanCha;
//访问错误
public class DongFengBiaoZhiCrawler {
	public List<AgencyEntity> getAgency() {
		List<AgencyEntity> list = new ArrayList<AgencyEntity>();
		try {
			//根据地址,加载网页,获取省份的id和name
			String getPro = "http://dealer.peugeot.com.cn/index.php";
			Document proDoc = Jsoup.connect(getPro)
					.header("Referer", "http://dealer.peugeot.com.cn/index.php")
					.header("Origin", "http://dealer.peugeot.com.cn")
					.header("Cookie", "dealer_areacode=00026; __clickidc=150172835433848804; _ga=GA1.4.48315299.1499679516; _gid=GA1.4.1718652286.1501728354; dealer_province_id=3654; dealer_province_name=%E5%9B%BA%E5%8E%9F%E5%B8%82; __utmt=1; Hm_lvt_c768224f827094e8c0488f836379fda5=1501728354; Hm_lpvt_c768224f827094e8c0488f836379fda5=1501731858; _dc_gtm_UA-46618732-1=1; _ga=GA1.3.48315299.1499679516; _gid=GA1.3.1718652286.1501728354; _dc_gtm_UA-45190795-1=1; __utma=56009130.48315299.1499679516.1499744387.1501728354.5; __utmb=56009130.37.9.1501731886946; __utmc=56009130; __utmz=56009130.1499679516.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)")
					.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")

					.get();
			Element proSelect = proDoc.getElementById("sp");
			Elements proOptions = proSelect.getElementsByTag("option");
			for(int i=1;i<proOptions.size();i++) {
				String proName = proOptions.get(i).text();
				String proNum = proOptions.get(i).attr("value");
				//System.out.println(proNum+":"+proName);
				//根据省份的id获取城市的id和name
				String getCity = "http://dealer.peugeot.com.cn/ajax.php?pid="+proNum+"&action=city";
				Document cityDoc = Jsoup.connect(getCity)
						.header("Referer", "http://dealer.peugeot.com.cn/index.php")
						.header("Origin", "http://dealer.peugeot.com.cn")
						.header("Cookie", "dealer_areacode=00026; __clickidc=150172835433848804; _ga=GA1.4.48315299.1499679516; _gid=GA1.4.1718652286.1501728354; dealer_province_id=3654; dealer_province_name=%E5%9B%BA%E5%8E%9F%E5%B8%82; __utmt=1; Hm_lvt_c768224f827094e8c0488f836379fda5=1501728354; Hm_lpvt_c768224f827094e8c0488f836379fda5=1501731858; _dc_gtm_UA-46618732-1=1; _ga=GA1.3.48315299.1499679516; _gid=GA1.3.1718652286.1501728354; _dc_gtm_UA-45190795-1=1; __utma=56009130.48315299.1499679516.1499744387.1501728354.5; __utmb=56009130.37.9.1501731886946; __utmc=56009130; __utmz=56009130.1499679516.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)")
						.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")

						.get();
				Elements cityOptions = cityDoc.getElementsByTag("option");
				for(int j=1;j<cityOptions.size();j++) {
					String cityName = cityOptions.get(j).text();
					String cityNum = cityOptions.get(j).attr("value");
					//System.out.println(cityNum+":"+cityName);
					//根据城市id,获取经销店的value
					String getDealer = "http://dealer.peugeot.com.cn/ajax.php?cid="+cityNum+"&action=dealer";
					Document dealerDoc = Jsoup.connect(getDealer)
							.header("Referer", "http://dealer.peugeot.com.cn/index.php")
							.header("Origin", "http://dealer.peugeot.com.cn")
							.header("Cookie", "dealer_areacode=00026; __clickidc=150172835433848804; _ga=GA1.4.48315299.1499679516; _gid=GA1.4.1718652286.1501728354; dealer_province_id=3654; dealer_province_name=%E5%9B%BA%E5%8E%9F%E5%B8%82; __utmt=1; Hm_lvt_c768224f827094e8c0488f836379fda5=1501728354; Hm_lpvt_c768224f827094e8c0488f836379fda5=1501731858; _dc_gtm_UA-46618732-1=1; _ga=GA1.3.48315299.1499679516; _gid=GA1.3.1718652286.1501728354; _dc_gtm_UA-45190795-1=1; __utma=56009130.48315299.1499679516.1499744387.1501728354.5; __utmb=56009130.37.9.1501731886946; __utmc=56009130; __utmz=56009130.1499679516.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)")
							.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
							.get();
					Elements dealerOptions = dealerDoc.getElementsByTag("option");
					for(int k=1;k<dealerOptions.size();k++) {
						String dealerNum = dealerOptions.get(k).attr("value");
						String name = dealerOptions.get(k).text();
						//System.out.println(dealerName);
						//根据经销商的id,获取经销商的地址电话
						String getDealerInfo = "http://dealer.peugeot.com.cn/dealer/"+dealerNum;
						//获取网页的状态码
						URL url = new URL(getDealerInfo);
						HttpURLConnection conn = (HttpURLConnection)url.openConnection();
						conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36");
						conn.setRequestProperty("Referer", "http://dealer.peugeot.com.cn/index.php");
						conn.connect();
						int statusCode = conn.getResponseCode();
						if(statusCode != 200) {
							continue;
						}
						
						Document doc = Jsoup.connect(getDealerInfo)
								.header("Cookie", "dealer_areacode=00026; __clickidc=150172835433848804; _ga=GA1.4.48315299.1499679516; _gid=GA1.4.1718652286.1501728354; dealer_province_id=3654; dealer_province_name=%E5%9B%BA%E5%8E%9F%E5%B8%82; __utmt=1; Hm_lvt_c768224f827094e8c0488f836379fda5=1501728354; Hm_lpvt_c768224f827094e8c0488f836379fda5=1501732257; _dc_gtm_UA-46618732-1=1; _ga=GA1.3.48315299.1499679516; _gid=GA1.3.1718652286.1501728354; _dc_gtm_UA-45190795-1=1; __utma=56009130.48315299.1499679516.1499744387.1501728354.5; __utmb=56009130.43.9.1501732260538; __utmc=56009130; __utmz=56009130.1499679516.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)")
								.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
								.header("Referer", "http://dealer.peugeot.com.cn/index.php")
								.get();
					
						Elements div = doc.getElementsByClass("col-xs-12 col-sm-4 position_right");
						//System.out.println(div);
						if(div.size()==0) {
							continue;
						}
						Elements lis = div.get(0).getElementsByTag("li");
						String address = "";
						try {
							address = lis.get(1).text().split("：")[1];
						} catch(Exception e) {
							
						}
						String[] sellTells = lis.get(2).text().split("：");
						String sellTell = null;
						if(sellTells.length>1) {
							sellTell = sellTells[1];
						}
						String[] serviceTells = lis.get(3).text().split("：");
						String serviceTell = null;
						if(serviceTells.length>1) {
							serviceTell = serviceTells[1];
						}
						String[] lngAndLat = Location.getLocation(address);
						String lng = lngAndLat[0];
						String lat = lngAndLat[1];
						List<String> shareholder = TianYanCha.getShareholder(name);
						String sControllingShareholder = shareholder.get(0);
						String sOtherShareholders = shareholder.get(1);
						AgencyEntity agency = new AgencyEntity();
						agency.setdCloseDate(null);
						agency.setdOpeningDate(null);
						agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
						agency.setnBrandID(-1);
						agency.setsBrand("标致");

						agency.setnDealerIDWeb(-1);
						agency.setnManufacturerID(-1);
						agency.setsManufacturer("东风标致");

						agency.setnState(1);
						agency.setsAddress(address);
						agency.setsCity(cityName);
						agency.setsCustomerServiceCall(serviceTell);
						agency.setsDealerName(name);
						agency.setsDealerType(null);
						agency.setsProvince(proName);
						agency.setsSaleCall(sellTell);
						agency.setsLongitude(lng);
						agency.setsLatitude(lat);
						agency.setsControllingShareholder(sControllingShareholder);
						agency.setsOtherShareholders(sOtherShareholders);
						
						list.add(agency);
					}
				}
			}
		} catch(Exception e) {
			e.printStackTrace();
		}
		return list;
	}
	public static void main(String[] args) {
		DongFengBiaoZhiCrawler crawler = new DongFengBiaoZhiCrawler();
		System.out.println("爬虫开始...");
		List<AgencyEntity> agencys = crawler.getAgency();
		System.out.println("抓取完毕,正在存库");
		for(AgencyEntity agency : agencys) {
			MybatisTool.save(agency);
		}
		MybatisTool.close(); 
		System.out.println("请查看数据库");
		
	}

}
